* Generate Table 4
* Author: Xiao Wang
* Version: 07/2020

cd "C:\Users\Xiao\Box\TRI_relocation_project\Paper1\Replication_kit"
set more off

use data_ttest, clear 

* -----------------------------------
* Table 4a. T-test on the diff. between origin and destination
* -----------------------------------                                                   
preserve 
sort facilityID year
by facilityID: gen kflag = 1 if m_indc_ctcnty[_n] == 1 & m_indc_ctcnty[_n - 1] == 0
keep if m_indc_ctcnty == 0 | kflag == 1
count if moving_ctcnty == 1

*************************
* Do Section 1 and Note section 2 and 3 when do t-test on all moves 
* Do Section 2 and Note section 1 and 3 when do t-test on cross-county moves
* Do Section 3 and Note section 1 and 2 when do t-test on within-county moves
*************************

* Section 1: all moves
sort facilityID year 
capture drop af_moving_ctcnty
by facilityID: gen af_moving_ctcnty = moving_ctcnty[_n - 1]
count
count if moving_ctcnty == 1 & m_indc_ctcnty == 0
count if af_moving_ctcnty == 1
count if moving_ctcnty == 1 & m_indc_ctcnty == 0 & report_bf == 1

keep if moving_ctcnty == 1 | af_moving_ctcnty == 1
*

/* Section 2: cross-county moves only
sort facilityID year 
capture drop af_moving_cnty
by facilityID: gen af_moving_cnty = moving_cnty[_n - 1]
count
count if moving_cnty == 1 & m_indc_ctcnty == 0
count if af_moving_cnty == 1

keep if moving_cnty == 1 | af_moving_cnty == 1
sum totalrel if af_moving_cnty == 1, detail
*/

/* Section 3: within-county moves 
sort facilityID year
capture drop af_moving_ct 
by facilityID: gen af_moving_ct = moving_ct[_n - 1]
count if moving_ct == 1 & m_indc_ctcnty == 0
count if af_moving_ct == 1 
count if af_moving_ct == 1 & totalrel > 0
count if af_moving_ct == 1 & totalrel > 12000

keep if moving_ct == 1 | af_moving_ct == 1 
sum totalrel if af_moving_ct == 1, detail
*/

// calculate the diff. origin - destination 
by facilityID: gen lgpopdensity_90diff2 = lgpopdensity_ctnb_dest - lgpopdensity_ctnb
by facilityID: gen lgpcinc_90diff2 = lgpcinc_ctnb_dest - lgpcinc_ctnb
by facilityID: gen edu390diff2 = edu3_rate_ct90nb_dest - edu3_rate_ct90nb
by facilityID: gen whitenonhis90diff2 = whitenonhis_ratenb_dest - whitenonhis_ratenb
by facilityID: gen votediff2 = voterturnoutnb_dest - voterturnoutnb
by facilityID: gen demo_ratediff2 = demo_ratenb_dest- demo_ratenb 
by facilityID: gen NAdiff2 = NA[_n + 1] - NA[_n] 

xtset facilityID year
by facilityID: drop if _n == 2 // keep one observation with all information we need for each facility
count
tab year

capture program drop odttest
program odttest
ttest lgpcinc_90diff2 = 0 if year < `1' & year > `2'
bysort report_bf: ttest lgpcinc_90diff2 = 0 if year < `1' & year > `2'

ttest edu390diff2 = 0 if year < `1' & year > `2'
bysort report_bf: ttest edu390diff2 = 0 if year < `1' & year > `2' 

ttest whitenonhis90diff2 == 0 if year < `1' & year > `2'
bysort report_bf: ttest whitenonhis90diff2 = 0 if year < `1' & year > `2'

ttest lgpopdensity_90diff2 = 0 if year < `1' & year > `2'
bysort report_bf: ttest lgpopdensity_90diff2 = 0 if year < `1' & year > `2'

ttest votediff2 = 0  if year < `1' & year > `2'
bysort report_bf: ttest votediff2 = 0 if year < `1' & year > `2'  

ttest demo_ratediff2 = 0 if year < `1' & year > `2'
bysort report_bf: ttest demo_ratediff2 = 0 if year < `1' & year > `2' 

ttest NAdiff2 = 0 if year < `1' & year > `2'
bysort report_bf: ttest NAdiff2 = 0 if year < `1' & year > `2'
end 

odttest 2011 1990 // By changing the parameters 1 and 2, we can do the t-test on relocation happened between year 2 and year 1. 

restore 


* ------------------------------- 
** Table 4B. Score + T-test

// Generate the coefficients in column 3, table 2
global x1 "lgemp"
global x2 "paydexdiff paydexmax"
 
global m1nb "lgpopdensity_ctnb"
global m2nb "lgpcinc_ctnb edu3_rate_ct90nb"
global m3nb "whitenonhis_ratenb voterturnoutnb demo_ratenb"
global m31nb "whitenonhis_ratenb"

global z1 "NA"
global z2nb "NTRI_ctnb"
global z3nb "lgwageindcnty highwaymiles2001nb railmiles2003nb urban_rate_ctnb housingvalue_adj1990nb" 
global z31nb "highwaymiles2001nb railmiles2003nb urban_rate_ctnb housingvalue_adj1990nb"

// Store the post-relocation data on NA (destination. in year t + 1)
sort facilityID year
by facilityID: gen  NA_f1 =  NA[_n + 1] if moving_ctcnty == 1

keep if m_indc_ctcnty == 0 
sort facilityID year
capture drop sp1 sp2 sp3 sp4 sp5 
mkspline sp1 4.2 sp2 17.2 sp3 84.7 sp4 203.5 sp5 = totalrelt, di  // pc: 65, 75, 90, 95

* 1) all cross-ct moves
set more off
drop if year == 2011
sort facilityID year
xtset facilityID year

logit moving_ctcnty $m2nb $m3nb $m1nb l.($x1 $x2) l2.(sp* zero_tl) l.($z1 $z2nb $z3nb) i.stateID i.sic2digm i.year, vce(cluster tractID)
matrix b = e(b)  
matselrc b bhat, c(1/16)

// use 1990 years value to calculate the scores
capture drop sc_* 
gen sc_pc_o = lgpcinc_ctnb * bhat[1,1] if moving_ctcnty == 1
gen sc_pc_d = lgpcinc_ctnb_dest * bhat[1,1] if moving_ctcnty == 1

gen sc_edu_o = edu3_rate_ct90nb * bhat[1,2] if moving_ctcnty == 1
gen sc_edu_d = edu3_rate_ct90nb_dest * bhat[1,2] if moving_ctcnty == 1

gen sc_white_o = whitenonhis_ratenb * bhat[1,3] if moving_ctcnty == 1
gen sc_white_d = whitenonhis_ratenb_dest * bhat[1,3] if moving_ctcnty == 1

gen sc_vote_o = voterturnoutnb * bhat[1,4] if moving_ctcnty == 1
gen sc_vote_d = voterturnoutnb_dest * bhat[1,4] if moving_ctcnty == 1

gen sc_demo_o = demo_ratenb * bhat[1,5] if moving_ctcnty == 1
gen sc_demo_d = demo_ratenb_dest * bhat[1,5] if moving_ctcnty == 1

gen sc_lgpop_o = lgpopdensity_ctnb * bhat[1,6] if moving_ctcnty == 1
gen sc_lgpop_d = lgpopdensity_ctnb_dest * bhat[1,6] if moving_ctcnty == 1

gen sc_na_o = NA * bhat[1,16] if moving_ctcnty == 1
gen sc_na_d = NA_f1 * bhat[1,16] if moving_ctcnty == 1

gen sc_m1_o = sc_pc_o + sc_edu_o
gen sc_m2_o = sc_white_o + sc_vote_o + sc_demo_o 
gen sc_m3_o = sc_lgpop_o 
gen sc_m1m2m3_o = sc_m1_o + sc_m2_o + sc_m3_o 
gen sc_m1m2m3z1_o = sc_m1m2m3_o + sc_na_o

gen sc_m1_d = sc_pc_d + sc_edu_d 
gen sc_m2_d = sc_white_d + sc_vote_d + sc_demo_d 
gen sc_m3_d = sc_lgpop_d 
gen sc_m1m2m3_d = sc_m1_d + sc_m2_d + sc_m3_d
gen sc_m1m2m3z1_d = sc_m1m2m3_d + sc_na_d

* T-test on score for all moves
ttest sc_m1m2m3_o = sc_m1m2m3_d  
ttest sc_m1m2m3z1_o = sc_m1m2m3z1_d
 
bysort report_bf: ttest sc_m1m2m3_o = sc_m1m2m3_d  
bysort report_bf: ttest sc_m1m2m3z1_o = sc_m1m2m3z1_d
	
*

* 2) only cross-cnty moves (note the 1) reg. part before do this)
set more off
sort facilityID year 
logit moving_cnty $m2nb $m3nb $m1nb l.($x1 $x2) l2.(sp* zero_tl) l.($z1 $z2nb $z3nb) i.stateID i.sic2digm i.year, cluster(tractID)

matrix b = e(b)  
matselrc b bhat, c(1/16) 

// use 1990 years value to calculate the scores
capture drop sc_*

// use 1990 years value to calculate the scores
capture drop sc_* 
gen sc_pc_o = lgpcinc_ctnb * bhat[1,1] if moving_cnty == 1
gen sc_pc_d = lgpcinc_ctnb_dest * bhat[1,1] if moving_cnty == 1

gen sc_edu_o = edu3_rate_ct90nb * bhat[1,2] if moving_cnty == 1
gen sc_edu_d = edu3_rate_ct90nb_dest * bhat[1,2] if moving_cnty == 1

gen sc_white_o = whitenonhis_ratenb * bhat[1,3] if moving_cnty == 1
gen sc_white_d = whitenonhis_ratenb_dest * bhat[1,3] if moving_cnty == 1

gen sc_vote_o = voterturnoutnb * bhat[1,4] if moving_cnty == 1
gen sc_vote_d = voterturnoutnb_dest * bhat[1,4] if moving_cnty == 1

gen sc_demo_o = demo_ratenb * bhat[1,5] if moving_cnty == 1
gen sc_demo_d = demo_ratenb_dest * bhat[1,5] if moving_cnty == 1

gen sc_lgpop_o = lgpopdensity_ctnb * bhat[1,6] if moving_cnty == 1
gen sc_lgpop_d = lgpopdensity_ctnb_dest * bhat[1,6] if moving_cnty == 1

gen sc_na_o = NA * bhat[1,16] if moving_cnty == 1
gen sc_na_d = NA_f1 * bhat[1,16] if moving_cnty == 1

gen sc_m1_o = sc_pc_o + sc_edu_o
gen sc_m2_o = sc_white_o + sc_vote_o + sc_demo_o 
gen sc_m3_o = sc_lgpop_o 
gen sc_m1m2m3_o = sc_m1_o + sc_m2_o + sc_m3_o 
gen sc_m1m2m3z1_o = sc_m1m2m3_o + sc_na_o

gen sc_m1_d = sc_pc_d + sc_edu_d 
gen sc_m2_d = sc_white_d + sc_vote_d + sc_demo_d 
gen sc_m3_d = sc_lgpop_d 
gen sc_m1m2m3_d = sc_m1_d + sc_m2_d + sc_m3_d
gen sc_m1m2m3z1_d = sc_m1m2m3_d + sc_na_d

* T-test on score for cross-cnty moves
ttest sc_m1m2m3_o = sc_m1m2m3_d  
ttest sc_m1m2m3z1_o = sc_m1m2m3z1_d
bysort report_bf: ttest sc_m1m2m3_o = sc_m1m2m3_d  
bysort report_bf: ttest sc_m1m2m3z1_o = sc_m1m2m3z1_d

*

* 3) only within-county moves  
set more off

sort facilityID year
qui logit moving_ct $m2nb $m31nb $m1nb l.($x1 $x2) l2.(sp* zero_tl) l.($z2nb $z31nb) i.stateID i.sic2digm i.year if maxmove_cnty == 0 & lgwageindcnty ~= . & lgempcnty ~= . & voterturnout1990 ~= ., cluster(tractID)
matrix b = e(b)  
matselrc b bhat, c(1/4)

// use 1990 years value to calculate the scores
capture drop sc_* 
gen sc_pc_o = lgpcinc_ctnb * bhat[1,1] if moving_ct == 1
gen sc_pc_d = lgpcinc_ctnb_dest * bhat[1,1] if moving_ct == 1

gen sc_edu_o = edu3_rate_ct90nb * bhat[1,2] if moving_ct == 1
gen sc_edu_d = edu3_rate_ct90nb_dest * bhat[1,2] if moving_ct == 1

gen sc_white_o = whitenonhis_ratenb * bhat[1,3] if moving_ct == 1
gen sc_white_d = whitenonhis_ratenb_dest * bhat[1,3] if moving_ct == 1

gen sc_lgpop_o = lgpopdensity_ctnb * bhat[1,4] if moving_ct == 1
gen sc_lgpop_d = lgpopdensity_ctnb_dest * bhat[1,4] if moving_ct == 1

gen sc_m1_o = sc_pc_o + sc_edu_o
gen sc_m2_o = sc_white_o
gen sc_m3_o = sc_lgpop_o 
gen sc_m1m2m3_o = sc_m1_o + sc_m2_o + sc_m3_o 

gen sc_m1_d = sc_pc_d + sc_edu_d 
gen sc_m2_d = sc_white_d
gen sc_m3_d = sc_lgpop_d 
gen sc_m1m2m3_d = sc_m1_d + sc_m2_d + sc_m3_d

* T-test on score for within-county moves
ttest sc_m1m2m3_o = sc_m1m2m3_d  
bysort report_bf: ttest sc_m1m2m3_o = sc_m1m2m3_d  